Data

Predictors: body mass interacting with habitat.

Read in data

mammal_fr <- read.csv("data/fr.csv") 

Notes

  • log10fr is the base-10 logarithm of breathing frequency (units not known?)

Cleaning

Rename some variables and create species name from genus and species.

mammal_fr <- mammal_fr %>%
  rename(source = Source,
         order = order.corrected,
        # n.animals = Number.of.Animals,
         mass.kg = body.mass..kg.) %>%
  mutate(order = str_to_title(order),
         genus = str_to_title(genus),
         animal = paste(genus, species),
         above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))

Keep only variables we will be using. And factor() “chr” variables.

mammal_fr <- mammal_fr %>%
  dplyr::select(order,
                genus,
                species, 
                common.name,
                mass.kg,
                log10.body.mass,
                log10fr,
                habitat,
                source,
                animal,
                above.10kg
         ) %>%
  mutate(across(where(is.character), factor)) %>%
  arrange(order, genus, species)

Viz

A few quick graphs just to make sure the data are looking as we expect (error checking).

my_scatter_plot <- gf_point(log10fr ~ log10.body.mass | habitat,
         color = ~order,
         # size = ~parse_number(n_animals),
         data = mammal_fr,
         alpha = 0.5) %>%
  gf_theme(legend.position = 'bottom',
           legend.title = element_text(size = 8),
           legend.text = element_text(size = 6)) %>%
  gf_theme(scale_color_viridis_d('Order')) %>%
  gf_labs(x = 'Log10(Mass (kg))',
          y = 'Log10(fR (breaths/min))') 
my_scatter_plot

plotly::ggplotly(my_scatter_plot) %>%
  plotly::layout(legend = list(#orientation = 'h',
                               font = list(size = 6)))

Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.

GLS

Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?

lm_model <- lm(log10fr ~ log10.body.mass * habitat,
                 data = mammal_fr)
summary(lm_model)
## 
## Call:
## lm(formula = log10fr ~ log10.body.mass * habitat, data = mammal_fr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0346 -0.1332  0.0048  0.1628  0.7500 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.52694    0.12823  11.908  < 2e-16 ***
## log10.body.mass             -0.40700    0.04817  -8.450 2.94e-13 ***
## habitatland                  0.21030    0.13763   1.528 0.129768    
## log10.body.mass:habitatland  0.19517    0.05445   3.584 0.000531 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2659 on 97 degrees of freedom
## Multiple R-squared:  0.7982, Adjusted R-squared:  0.792 
## F-statistic: 127.9 on 3 and 97 DF,  p-value: < 2.2e-16
tab_model(lm_model) 
  log 10 fr
Predictors Estimates CI p
(Intercept) 1.53 1.27 – 1.78 <0.001
log10.body.mass -0.41 -0.50 – -0.31 <0.001
habitat [land] 0.21 -0.06 – 0.48 0.130
log10.body.mass * habitat
[land]
0.20 0.09 – 0.30 0.001
Observations 101
R2 / R2 adjusted 0.798 / 0.792
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
Anova Table (Type II tests)
  Sum Sq Df F value Pr(>F)
log10.body.mass 9.063 1 128.1 1.956e-19
habitat 8.024 1 113.4 5.293e-18
log10.body.mass:habitat 0.9087 1 12.85 0.0005309
Residuals 6.861 97 NA NA

Model Assessment

lm_preds <- predict(lm_model, se.fit = TRUE)

mammal_fr <- mammal_fr  %>% mutate(lm_resids = resid(lm_model),
         lm_fitted = lm_preds$fit,
         lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
         lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_fr)

s245::gf_acf(~lm_model)

gf_dhistogram(~lm_resids, data = mammal_fr,
              bins = 20) %>%
  gf_fitdistr()

Model Predictions

Any predictors not shown in a plot are held constant at their mean or most common value.

gf_line(lm_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_fr) %>%
  gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors))

gf_point(log10fr ~ lm_fitted, data = mammal_fr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fR)',
          x = 'Model-predicted log10(fR)',
          title = 'Linear Regresssion (no phylogeny)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

Mixed-effects model

Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.

re_model <- glmmTMB(log10fr ~ log10.body.mass * habitat + (1 | order/genus/species),
                 data = mammal_fr)
summary(re_model)
##  Family: gaussian  ( identity )
## Formula:          
## log10fr ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_fr
## 
##      AIC      BIC   logLik deviance df.resid 
##     10.3     31.2      2.8     -5.7       93 
## 
## Random effects:
## 
## Conditional model:
##  Groups                Name        Variance Std.Dev.
##  species:(genus:order) (Intercept) 0.01268  0.1126  
##  genus:order           (Intercept) 0.01781  0.1334  
##  order                 (Intercept) 0.05066  0.2251  
##  Residual                          0.01752  0.1324  
## Number of obs: 101, groups:  
## species:(genus:order), 101; genus:order, 85; order, 10
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0175 
## 
## Conditional model:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  1.53600    0.14947  10.276  < 2e-16 ***
## log10.body.mass             -0.42091    0.04738  -8.884  < 2e-16 ***
## habitatland                  0.05191    0.13167   0.394    0.693    
## log10.body.mass:habitatland  0.23521    0.05120   4.594 4.36e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model) 
  log 10 fr
Predictors Estimates CI p
(Intercept) 1.54 1.24 – 1.83 <0.001
log10.body.mass -0.42 -0.51 – -0.33 <0.001
habitat [land] 0.05 -0.21 – 0.31 0.693
log10.body.mass * habitat
[land]
0.24 0.13 – 0.34 <0.001
Random Effects
σ2 0.02
τ00 species:(genus:order) 0.01
τ00 genus:order 0.02
τ00 order 0.05
ICC 0.80
N species 98
N genus 85
N order 10
Observations 101
Marginal R2 / Conditional R2 0.730 / 0.945
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 70.12 1 5.581e-17
habitat 88.75 1 4.474e-21
log10.body.mass:habitat 21.1 1 4.355e-06

Model Assessment

re_ave_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = ~0)
re_ind_preds <- predict(re_model,
                        se.fit = TRUE,
                        re.form = NULL)
mammal_fr <- mammal_fr %>%
  mutate(re_resids = resid(re_model),
         re_ind_fitted = re_ind_preds$fit,
         re_ave_fitted = re_ave_preds$fit,
         re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
         re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)

# save fitted model and data
saveRDS(mammal_fr, 'fitted-models/fr-data.RDS')
saveRDS(re_model, 'fitted-models/fr-re-model.RDS')


gf_point(re_resids ~ re_ind_fitted, data = mammal_fr)

acf(resid(re_model))

#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_fr) %>%
  gf_fitdistr()

Predictions from Model

gf_line(re_ave_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_fr) %>%
  gf_ribbon(re_ave_lo + re_ave_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors))

gf_point(log10fr ~ re_ind_fitted, data = mammal_fr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fr)',
          x = 'Model-predicted, Species-specific log10(fr)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?

no_species <- mammal_fr %>%
  mutate(species = NA)
re_genus_level_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = NULL,
                    newdata = no_species)

gf_point(log10fr ~ re_genus_level_preds$fit, data = mammal_fr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fH)',
          x = 'Model-predicted, Genus-specific log10(fr)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

PGLS

Read in Tree Data

#Read in the trees from Upham et al
#Upham et al provide four sets of 10000 trees
# ??? There are 101 files of trees in the directory?
# original code sampled 4x10 trees from a set of 100
# here I am just using all 101 that are there, is that oK?

tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)

all_trees <- list()

for (i in 1:length(tree_files)){
  all_trees[[i]] <- read.tree(paste0(tree_path, '/',
                                     tree_files[i]))
  if (i == 1){
    treeset <- all_trees[[i]]
  }else{
    treeset <- c(treeset, all_trees[[i]])
  }
}

all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
                             function(x) 
                               stringr::str_replace_all(x, pattern = '_',
                                                      replacement = ' '))

# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
  if (t == 1){
    tip_labs <- all_tip_labels[[t]]
  }else{
    tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
  }
}

                            
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_fr %>%
  filter(animal %in% tip_labs) %>%
  droplevels()


taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')

Fit models, one model for every tree in our list.

pgls_models <- list()

for (t in c(1:length(treeset))){
  # make sure there is only one row of data per species (seems dubious??)
  pgls_rep_data <- pgls_data %>% 
    group_by(animal) %>%
    sample_n(1) %>%
    ungroup
  
  
  #Reduce the tree to only include those species in the data set
  refit_tree <- treeset[[t]]
  refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
  refit_tree <- drop.tip(refit_tree, 
                         setdiff(refit_tree$tip.label, 
                                 levels(pgls_rep_data %>% pull(animal))))
  
  #Order the data set so that it is in the same order as the tip labels of the tree
  pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
                             pgls_rep_data,
                             by = c('tree.tip.label' = 'animal'),
                             keep = TRUE)
  
  # fit the model
 pgls_models[[t]] <- tryCatch({
    fittd <- gls(log10fr ~ log10.body.mass * habitat,
                   correlation = corPagel(value = 0.8, 
                                          phy = refit_tree, 
                                          fixed = FALSE, 
                                          form = ~animal),
                   data = pgls_rep_data)
    },
  error = function(cond){
    message(paste('PGLS fit failed for tree', t))
    return(NULL)
  }
  )
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]

Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 0.

Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.

# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)  
# # pool summarise the models using Rubin's rule corrected for small samples
#summary(pgls_mira)
pooled_pgls   <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
#pooled_pgls_summ <- summary(pool(pgls_mira, type = 'all', conf.int = TRUE))

pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
Table continues below
term estimate std.error 2.5 % 97.5 %
(Intercept) 1.609 0.1827 1.247 1.972
log10.body.mass -0.415 0.04703 -0.5084 -0.3216
habitatland 0.1285 0.1379 -0.1453 0.4024
log10.body.mass:habitatland 0.1935 0.05439 0.08551 0.3015
lambda fmi
0.001209 0.0216
0.0006789 0.02107
0.001403 0.0218
0.0139 0.03429
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
Analysis of Deviance Table (Type II tests) (continued below)
  F num df den df missing info riv
log10.body.mass 145.8 1 42780 0.04647 0.04873
habitat 85.92 1 13315 0.08337 0.09095
log10.body.mass:habitat 12.66 1 477466 0.0139 0.0141
  Pr(>F)
log10.body.mass 1.644e-33
habitat 2.156e-20
log10.body.mass:habitat 0.0003741

Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally.

pgls_avg <- model.avg(pgls_models, 
                      rank = function(x) 1)
lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.7097979